Packages Used

In [1]:
import plotly
from plotly.graph_objs import Scatter, Layout
import pandas as pd
import numpy as np
import locale
locale.setlocale(locale.LC_ALL, '')
from plotly.graph_objs import *

Set plotly to offline; produce all plots in the notebook

In [2]:
plotly.offline.init_notebook_mode(connected=True)

Import the best model's test set performance

Which does not contain independent variables

In [3]:
# Import best notebook and narrow that information to only the best model
best_notebooks_test_perf = pd.read_csv("C:\\Users\\Jeremy Diaz\\Documents\\earth-analytics\\tornadoesr\\Complete_Workflow\\20_test_perf.csv")

best_model_perf = best_notebooks_test_perf.loc[best_notebooks_test_perf['model_number'] == 6]
best_model_perf = best_model_perf.reset_index(drop = True)

Import the unprocessed data

To undo the data processing with the same values

In [4]:
unproc_tor_df = pd.read_csv("C:\\Users\\Jeremy Diaz\\Documents\\earth-analytics\\tornadoesr\\data\\raw\\tor_data_with_interact_effects.csv")

Storing those values

In [5]:
mean_lat = np.mean(unproc_tor_df['BEGIN_LAT'])
stand_dev_lat = np.std(unproc_tor_df['BEGIN_LAT'])

mean_lon = np.mean(unproc_tor_df['BEGIN_LON'])
stand_dev_lon = np.std(unproc_tor_df['BEGIN_LON'])

mean_log_dam = np.mean(np.log(unproc_tor_df['DAMAGE_PROPERTY'] + 1))
stand_dev_log_dam = np.std(np.log(unproc_tor_df['DAMAGE_PROPERTY'] + 1))

Import the test set

To get location information

In [6]:
test_set = pd.read_csv("C:\\Users\\Jeremy Diaz\\Documents\\earth-analytics\\tornadoesr\\data\\raw\\tor_test_set_no_zeros.csv")

Undo the processing

In [7]:
test_set['natural_scale_true'] = np.exp((test_set['DAMAGE_PROPERTY'] * stand_dev_log_dam) + mean_log_dam) - 1

test_set['natural_scale_pred'] = np.exp((best_model_perf['predicted_values'] * stand_dev_log_dam) + mean_log_dam) - 1

test_set['natural_scale_resid'] = test_set['natural_scale_pred'] - test_set['natural_scale_true']

test_set['BEGIN_LAT'] = (test_set['BEGIN_LAT'] * stand_dev_lat) + mean_lat

test_set['BEGIN_LON'] = (test_set['BEGIN_LON'] * stand_dev_lon) + mean_lon

This will tell whether each prediction was an over- or underestimate, get the absolute difference between predicted and true values (in log-10 scale), then apply the corresponding sign for that difference (negative for underestimates and positive for overestimates).

In [8]:
sign_list = []

for i in range(len(test_set)):
    
    if test_set['natural_scale_resid'][i] > 0:
        sign_list.append(1)
        
    else:
        sign_list.append(-1)
    
test_set['natural_resid_sign'] = sign_list

test_set['log_10_abs_resid'] = np.log10(abs(test_set['natural_scale_resid']))

test_set['log_10_resid_direction'] = test_set['natural_resid_sign'] * test_set['log_10_abs_resid']

Getting a clean label for each point

In [9]:
labels = []

for i in range(len(test_set['natural_scale_resid'])):
    
    intermediate = locale.format("%d", test_set["natural_scale_resid"][i], grouping = True)
    
    intermediate = "$" + intermediate
    
    labels.append(intermediate)

The Test Set Residual Map

This will produce the map, showing where and by how much the model was wrong. Dark blues imply strong overestimates, while dark reds imply strong underestimates. Lighter coolors indicate where the model did relatively well.

In [10]:
data = [dict(type = "scattergeo",
             lon = test_set["BEGIN_LON"],
             lat = test_set["BEGIN_LAT"],
             text = labels,
             marker = dict(color = test_set['log_10_resid_direction'],
                           colorscale = [[0.0, 'rgb(165,0,38)'],
                                         [0.1111111111111111, 'rgb(215,48,39)'],
                                         [0.2222222222222222, 'rgb(244,109,67)'],
                                         [0.3333333333333333, 'rgb(253,174,97)'],
                                         [0.4444444444444444, 'rgb(254,224,144)'],
                                         [0.5555555555555556, 'rgb(224,243,248)'],
                                         [0.6666666666666666, 'rgb(171,217,233)'],
                                         [0.7777777777777778, 'rgb(116,173,209)'],
                                         [0.8888888888888888, 'rgb(69,117,180)'],
                                         [1.0, 'rgb(49,54,149)']],
                           size = 5.25,
                           line = dict(width = 0.375,
                                       color = 'rgba(0, 0, 0)'),
                           cmin = test_set['log_10_resid_direction'].min(),
                           cmax = test_set['log_10_resid_direction'].min(),
                           colorbar = dict(title = 'Direction and Magnitude of Residual')))]

layout = dict(geo = dict(scope = 'north america',
                         showland = True,
                         landcolor = "rgb(255, 255, 255)",
                         subunitcolor = "rgb(0, 0, 0)",
                         countrycolor = "rgb(0, 0, 0)",
                         showlakes = True,
                         showocean = True,
                         lakecolor = "rgb(230, 230, 230)",
                         oceancolor = "rgb(230, 230, 230)",
                         showsubunits = True,
                         showcountries = True,
                         resolution = 50,
                         lonaxis = dict(showgrid = True,
                                        gridwidth = 0.05,
                                        range= [-125.0, -70.0],
                                        dtick = 5),
                         lataxis = dict(showgrid = True,
                                        gridwidth = 0.05,
                                        range= [23.0, 50.0],
                                        dtick = 5)),
              title = 'Map of Test Set Residuals')

fig1 = {'data':data,
       'layout':layout}

plotly.offline.iplot(fig1)

Import the predictions grid

In [11]:
predictions_2018 = pd.read_csv("C:\\Users\\Jeremy Diaz\\Documents\\earth-analytics\\tornadoesr\\Complete_Workflow\\grid_with_predictions.csv")

Making the month variable easier to work with

In [12]:
predictions_2018['MONTH'] = pd.factorize(predictions_2018.MONTH)[0] + 1

Undoing the processing

In [13]:
predictions_2018['DAMAGE_PROPERTY'] = np.exp((predictions_2018['DAMAGE_PROPERTY'] * stand_dev_log_dam) + mean_log_dam) - 1

predictions_2018['log_10_dam'] = np.log10(predictions_2018['DAMAGE_PROPERTY'] + 1)

predictions_2018['BEGIN_LAT'] = (predictions_2018['BEGIN_LAT'] * stand_dev_lat) + mean_lat

predictions_2018['BEGIN_LON'] = (predictions_2018['BEGIN_LON'] * stand_dev_lon) + mean_lon

Getting the labels

In [14]:
labels2 = []

for i in range(len(predictions_2018['DAMAGE_PROPERTY'])):
    
    intermediate = locale.format("%d", predictions_2018["DAMAGE_PROPERTY"][i], grouping = True)
    
    intermediate = "$" + intermediate
    
    labels2.append(intermediate)
    
predictions_2018['labels'] = labels2

Separating months so that the map can have a dropdown menu selection

In [15]:
jan_data = predictions_2018[predictions_2018['MONTH'] == 1]
feb_data = predictions_2018[predictions_2018['MONTH'] == 2]
mar_data = predictions_2018[predictions_2018['MONTH'] == 3]
apr_data = predictions_2018[predictions_2018['MONTH'] == 4]
may_data = predictions_2018[predictions_2018['MONTH'] == 5]
jun_data = predictions_2018[predictions_2018['MONTH'] == 6]
jul_data = predictions_2018[predictions_2018['MONTH'] == 7]
aug_data = predictions_2018[predictions_2018['MONTH'] == 8]
sep_data = predictions_2018[predictions_2018['MONTH'] == 9]
oct_data = predictions_2018[predictions_2018['MONTH'] == 10]
nov_data = predictions_2018[predictions_2018['MONTH'] == 11]
dec_data = predictions_2018[predictions_2018['MONTH'] == 12]

The 2018 Prediction Map

Producing the map which is made of model predictions for the 15th day of each 2018 month. Yellows indicate massive property damage, greens and blues indicate intermediate damage, and purples indicate relatively low property damage. The dropdown menu will allow you to select which month to view.

In [16]:
trace1 = {"lon": jan_data["BEGIN_LON"],
          "lat":  jan_data["BEGIN_LAT"],
          "name": "January",
          "text": jan_data["labels"],
          "marker": {"color": jan_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.25,
                     "symbol":"square",
                     "cmin": jan_data['log_10_dam'].min(),
                     "cmax": jan_data['log_10_dam'].min(),
                     "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
          "type": "scattergeo", 
          "visible": True}
trace2 = {"lon": feb_data["BEGIN_LON"],
          "lat":  feb_data["BEGIN_LAT"], 
          "name": "February",
          "text": feb_data["labels"],
          "marker": {"color": feb_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.25,
                     "symbol":"square",
                     "cmin": feb_data['log_10_dam'].min(),
                     "cmax": feb_data['log_10_dam'].min(),
                     "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
          "type": "scattergeo", 
          "visible": True}
trace3 = {"lon": mar_data["BEGIN_LON"],
          "lat":  mar_data["BEGIN_LAT"],
          "name": "March",
          "text": mar_data["labels"],
          "marker": {"color": mar_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.25,
                     "symbol":"square",
                     "cmin": mar_data['log_10_dam'].min(),
                     "cmax": mar_data['log_10_dam'].min(),
                     "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
          "type": "scattergeo", 
          "visible": True}
trace4 = {"lon": apr_data["BEGIN_LON"],
          "lat":  apr_data["BEGIN_LAT"],
          "name": "April",
          "text": apr_data["labels"],
          "marker": {"color": apr_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.25,
                     "symbol":"square",
                     "cmin": apr_data['log_10_dam'].min(),
                     "cmax": apr_data['log_10_dam'].min(),
                     "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
          "type": "scattergeo", 
          "visible": True}
trace5 = {"lon": may_data["BEGIN_LON"],
          "lat":  may_data["BEGIN_LAT"],
          "name": "May",
          "text": may_data["labels"],
          "marker": {"color": may_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.25,
                     "symbol":"square",
                     "cmin": may_data['log_10_dam'].min(),
                     "cmax": may_data['log_10_dam'].min(),
                     "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
          "type": "scattergeo", 
          "visible": True}
trace6 = {"lon": jun_data["BEGIN_LON"],
          "lat":  jun_data["BEGIN_LAT"],
          "name": "June",
          "text": jun_data["labels"],
          "marker": {"color": jun_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.25,
                     "symbol":"square",
                     "cmin": jun_data['log_10_dam'].min(),
                     "cmax": jun_data['log_10_dam'].min(),
                     "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
          "type": "scattergeo", 
          "visible": True}
trace7 = {"lon": jul_data["BEGIN_LON"],
          "lat":  jul_data["BEGIN_LAT"],
          "name": "July",
          "text": jul_data["labels"],
          "marker": {"color": jul_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.25,
                     "symbol":"square",
                     "cmin": jul_data['log_10_dam'].min(),
                     "cmax": jul_data['log_10_dam'].min(),
                     "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
          "type": "scattergeo", 
          "visible": True}
trace8 = {"lon": aug_data["BEGIN_LON"],
          "lat":  aug_data["BEGIN_LAT"],
          "name": "August",
          "text": aug_data["labels"], 
          "marker": {"color": aug_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.25,
                     "symbol":"square",
                     "cmin": aug_data['log_10_dam'].min(),
                     "cmax": aug_data['log_10_dam'].min(),
                     "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
          "type": "scattergeo", 
          "visible": True}
trace9 = {"lon": sep_data["BEGIN_LON"],
          "lat":  sep_data["BEGIN_LAT"],
          "name": "September",
          "text": sep_data["labels"],
          "marker": {"color": sep_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.25,
                     "symbol":"square",
                     "cmin": sep_data['log_10_dam'].min(),
                     "cmax": sep_data['log_10_dam'].min(),
                     "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
          "type": "scattergeo", 
          "visible": True}
trace10 = {"lon": oct_data["BEGIN_LON"],
           "lat":  oct_data["BEGIN_LAT"],
           "name": "October",
           "text": oct_data["labels"],
           "marker": {"color": oct_data["log_10_dam"],
                      "colorscale": "Viridis",
                      "size": 2.25,
                      "symbol":"square",
                      "cmin": oct_data['log_10_dam'].min(),
                      "cmax": oct_data['log_10_dam'].min(),
                      "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
           "type": "scattergeo", 
           "visible": True}
trace11 = {"lon": nov_data["BEGIN_LON"],
           "lat":  nov_data["BEGIN_LAT"],
           "name": "November",
           "text": nov_data["labels"],
           "marker": {"color": nov_data["log_10_dam"],
                      "colorscale": "Viridis",
                      "size": 2.25,
                      "symbol":"square",
                      "cmin": nov_data['log_10_dam'].min(),
                      "cmax": nov_data['log_10_dam'].min(),
                      "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
           "type": "scattergeo", 
           "visible": True}
trace12 = {"lon": dec_data["BEGIN_LON"],
           "lat":  dec_data["BEGIN_LAT"],
           "name": "December",
           "text": dec_data["labels"],
           "marker": {"color": dec_data["log_10_dam"],
                      "colorscale": "Viridis",
                      "size": 2.25,
                      "symbol":"square",
                      "cmin": dec_data['log_10_dam'].min(),
                      "cmax": dec_data['log_10_dam'].min(),
                      "colorbar": dict(title = 'Magnitude of Predicted Property Damage')},
           "type": "scattergeo", 
           "visible": True}

data2 = Data([trace1, trace2, trace3, trace4,
              trace5, trace6, trace7, trace8,
              trace9, trace10, trace11, trace12])

layout2 = dict(geo = dict(scope = 'north america',
                          showland = True,
                          landcolor = "rgb(0, 0, 0)",
                          subunitcolor = "rgb(255, 255, 255)",
                          countrycolor = "rgb(255, 255, 255)",
                          showlakes = True,
                          showocean = True,
                          lakecolor = "rgb(23, 23, 23)",
                          oceancolor = "rgb(23, 23, 23)",
                          showsubunits = True,
                          showcountries = True,
                          resolution = 50,
                          lonaxis = dict(showgrid = True,
                                         gridwidth = 0.05,
                                         range= [-125.0, -70.0],
                                         dtick = 5),
                          lataxis = dict(showgrid = True,
                                         gridwidth = 0.05,
                                         range= [23.0, 50.0],
                                         dtick = 5)),
               title = 'Model Predictions for 2018')

updatemenus = [{'buttons': [{'args': ['visible', [True, False, False, False,
                                                  False, False, False, False,
                                                  False, False, False, False]],
                             'label': 'Show January',
                             'method': 'restyle'},
                            {'args': ['visible', [False, True, False, False,
                                                  False, False, False, False,
                                                  False, False, False, False]],
                             'label': 'Show February',
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, True, False,
                                                  False, False, False, False,
                                                  False, False, False, False]],
                             'label': "Show March",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, True,
                                                  False, False, False, False,
                                                  False, False, False, False]],
                             'label': "Show April",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  True, False, False, False,
                                                  False, False, False, False]],
                             'label': "Show May",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, True, False, False,
                                                  False, False, False, False]],
                             'label': "Show June",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, True, False,
                                                  False, False, False, False]],
                             'label': "Show July",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, False, True,
                                                  False, False, False, False]],
                             'label': "Show August",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, False, False,
                                                  True, False, False, False]],
                             'label': "Show September",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, False, False,
                                                  False, True, False, False]],
                             'label': "Show October",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, False, False,
                                                  False, False, True, False]],
                             'label': "Show November",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, False, False,
                                                  False, False, False, True]],
                             'label': "Show December",
                             'method': 'restyle'}],
               'type': 'buttons'}]

layout2['updatemenus'] = updatemenus

fig2 = {'data':data2,
       'layout':layout2}

plotly.offline.iplot(fig2)